 datasets="advbench"
 for dataset in $datasets; do
     echo "Testing dataset: $dataset"
     python main.py \
         --model osum \
         --data $dataset \
         --split test \
         --modality audio \
         --id 4
 done


 datasets="alpacaeval commoneval"
 for dataset in $datasets; do
     echo "Testing dataset: $dataset"
     python main.py \
         --model osum \
         --data $dataset \
         --split test \
         --modality audio \
         --id 4
 done

# datasets="openbookqa ifeval advbench"
# for dataset in $datasets; do
#     echo "Testing dataset: $dataset"
#     python main.py \
#         --model osum \
#         --data $dataset \
#         --split test \
#         --modality audio \
#         --id 3
# done

# echo "Testing dataset: sd-qa"
# python main.py \
#     --model osum \
#     --data sd-qa \
#     --split aus \
#     --modality audio \
#     --id 3

#tests="law engineering other biology business economics health philosophy psychology history chemistry physics"
## tests="business economics health philosophy psychology history chemistry physics"
#for test in $tests; do
#    echo "Testing test: $test"
#    python main.py \
#        --model osum \
#        --data mmsu \
#        --split $test \
#        --modality audio \
#        --id 3
#done
